{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e0ac630e-3729-463a-b702-ded49ac520bc",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:24.642742Z",
     "iopub.status.busy": "2022-06-08T04:05:24.642467Z",
     "iopub.status.idle": "2022-06-08T04:05:24.900176Z",
     "shell.execute_reply": "2022-06-08T04:05:24.899440Z",
     "shell.execute_reply.started": "2022-06-08T04:05:24.642677Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 导入包\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5e5c886-548f-40b2-81ae-493abda261c4",
   "metadata": {},
   "source": [
    "### 分组与聚合\n",
    "\n",
    "<img src=\"images/group.jpg\" alt=\"分组与聚合\" style=\"zoom: 35%;\" />"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7170b21a-e749-4874-ac7c-5856509a74b9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:28.415393Z",
     "iopub.status.busy": "2022-06-08T04:05:28.414738Z",
     "iopub.status.idle": "2022-06-08T04:05:28.433072Z",
     "shell.execute_reply": "2022-06-08T04:05:28.431823Z",
     "shell.execute_reply.started": "2022-06-08T04:05:28.415367Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>brand</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Oneplus Ace</td>\n",
       "      <td>Oneplus</td>\n",
       "      <td>2999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>iPhone 13</td>\n",
       "      <td>Apple</td>\n",
       "      <td>5999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Galaxy Z Flip3</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>5999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>iPhone 13 Pro Max</td>\n",
       "      <td>Apple</td>\n",
       "      <td>8999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Oneplus 10</td>\n",
       "      <td>Oneplus</td>\n",
       "      <td>5299</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               model    brand  price\n",
       "0        Oneplus Ace  Oneplus   2999\n",
       "1          iPhone 13    Apple   5999\n",
       "2     Galaxy Z Flip3  Samsung   5999\n",
       "3  iPhone 13 Pro Max    Apple   8999\n",
       "4         Oneplus 10  Oneplus   5299"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 创建实验数据\n",
    "data = pd.DataFrame(\n",
    "    {\n",
    "        \"model\": [\n",
    "            \"Oneplus Ace\",\n",
    "            \"iPhone 13\",\n",
    "            \"Galaxy Z Flip3\",\n",
    "            \"iPhone 13 Pro Max\",\n",
    "            \"Oneplus 10\",\n",
    "        ],\n",
    "        \"brand\": [\"Oneplus\", \"Apple\", \"Samsung\", \"Apple\", \"Oneplus\"],\n",
    "        \"price\": [2999, 5999, 5999, 8999, 5299],\n",
    "    }\n",
    ")\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4be6d98d-129c-40ab-b8fb-7e34f36d3d66",
   "metadata": {
    "tags": []
   },
   "source": [
    "## 一、分组(groupby)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3df5dd06-f3bb-425a-a0c0-17bad5ca3659",
   "metadata": {},
   "source": [
    "### 1.1 分组"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d530b40-c01c-4653-a038-850e5f677bfe",
   "metadata": {
    "tags": []
   },
   "source": [
    "#### 1.1.1 按单列分组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a568662a-c4a2-4539-8e53-45c421f9c31b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:35.737389Z",
     "iopub.status.busy": "2022-06-08T04:05:35.737073Z",
     "iopub.status.idle": "2022-06-08T04:05:35.742319Z",
     "shell.execute_reply": "2022-06-08T04:05:35.741614Z",
     "shell.execute_reply.started": "2022-06-08T04:05:35.737365Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.groupby.generic.DataFrameGroupBy"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 根据你设定列标签(by=列标签)对应的元素值进行分组(split)\n",
    "# 返回包含分组名(元素值)和数据块(DataFrame)的二维元组序列(DataFrameGroupBy)\n",
    "groupedData = data.groupby(by=\"brand\") #也可省略写成：groupedData = data.groupby(\"brand\")\n",
    "type(groupedData)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "fb08e2ec-864a-4cbc-8f95-926fbb32aa0c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:37.961271Z",
     "iopub.status.busy": "2022-06-08T04:05:37.960938Z",
     "iopub.status.idle": "2022-06-08T04:05:37.988438Z",
     "shell.execute_reply": "2022-06-08T04:05:37.987751Z",
     "shell.execute_reply.started": "2022-06-08T04:05:37.961246Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">price</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>brand</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Apple</th>\n",
       "      <td>2.0</td>\n",
       "      <td>7499.0</td>\n",
       "      <td>2121.320344</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>6749.0</td>\n",
       "      <td>7499.0</td>\n",
       "      <td>8249.0</td>\n",
       "      <td>8999.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oneplus</th>\n",
       "      <td>2.0</td>\n",
       "      <td>4149.0</td>\n",
       "      <td>1626.345597</td>\n",
       "      <td>2999.0</td>\n",
       "      <td>3574.0</td>\n",
       "      <td>4149.0</td>\n",
       "      <td>4724.0</td>\n",
       "      <td>5299.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Samsung</th>\n",
       "      <td>1.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        price                                                             \n",
       "        count    mean          std     min     25%     50%     75%     max\n",
       "brand                                                                     \n",
       "Apple     2.0  7499.0  2121.320344  5999.0  6749.0  7499.0  8249.0  8999.0\n",
       "Oneplus   2.0  4149.0  1626.345597  2999.0  3574.0  4149.0  4724.0  5299.0\n",
       "Samsung   1.0  5999.0          NaN  5999.0  5999.0  5999.0  5999.0  5999.0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看分组详情\n",
    "groupedData.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "1efb5fe4-f968-4b1b-b160-2f5912b93815",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:40.006339Z",
     "iopub.status.busy": "2022-06-08T04:05:40.006014Z",
     "iopub.status.idle": "2022-06-08T04:05:40.011974Z",
     "shell.execute_reply": "2022-06-08T04:05:40.011320Z",
     "shell.execute_reply.started": "2022-06-08T04:05:40.006314Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Apple': [1, 3], 'Oneplus': [0, 4], 'Samsung': [2]}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看分组情况\n",
    "groupedData.groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2ae47c29-2645-4dee-8777-08f4f1d8194d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:40.991770Z",
     "iopub.status.busy": "2022-06-08T04:05:40.991455Z",
     "iopub.status.idle": "2022-06-08T04:05:40.996483Z",
     "shell.execute_reply": "2022-06-08T04:05:40.995795Z",
     "shell.execute_reply.started": "2022-06-08T04:05:40.991748Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Apple', 'Oneplus', 'Samsung'])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看分组名\n",
    "groupedData.groups.keys()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d39d49d-e082-4149-a945-90fd1e42e985",
   "metadata": {
    "tags": []
   },
   "source": [
    "#### 1.1.2 按多列分组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "818618d5-f2f7-4652-b11c-a816f8310fef",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:42.700916Z",
     "iopub.status.busy": "2022-06-08T04:05:42.700593Z",
     "iopub.status.idle": "2022-06-08T04:05:42.706250Z",
     "shell.execute_reply": "2022-06-08T04:05:42.705394Z",
     "shell.execute_reply.started": "2022-06-08T04:05:42.700891Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.groupby.generic.DataFrameGroupBy"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 根据你设定列标签(by=[列标签])对应的元素值进行分组(split)\n",
    "# 返回包含分组名(元素值)和数据块(DataFrame)的二维元组序列(DataFrameGroupBy)\n",
    "# 分组带有层次索引\n",
    "groupedData2 = data.groupby(by=[\"brand\",\"model\"]) #也可省略写成：groupedData = data.groupby([\"brand\",\"model\"])\n",
    "type(groupedData2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "29215f79-c44e-4999-8447-97bc5f2b0ed7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:44.471391Z",
     "iopub.status.busy": "2022-06-08T04:05:44.471055Z",
     "iopub.status.idle": "2022-06-08T04:05:44.479108Z",
     "shell.execute_reply": "2022-06-08T04:05:44.478382Z",
     "shell.execute_reply.started": "2022-06-08T04:05:44.471365Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{('Apple', 'iPhone 13'): [1], ('Apple', 'iPhone 13 Pro Max'): [3], ('Oneplus', 'Oneplus 10'): [4], ('Oneplus', 'Oneplus Ace'): [0], ('Samsung', 'Galaxy Z Flip3'): [2]}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看分组情况\n",
    "groupedData2.groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e47a74af-87dc-4ac7-a665-558a83730bf5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:44.890806Z",
     "iopub.status.busy": "2022-06-08T04:05:44.890473Z",
     "iopub.status.idle": "2022-06-08T04:05:44.895167Z",
     "shell.execute_reply": "2022-06-08T04:05:44.894493Z",
     "shell.execute_reply.started": "2022-06-08T04:05:44.890782Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys([('Apple', 'iPhone 13'), ('Apple', 'iPhone 13 Pro Max'), ('Oneplus', 'Oneplus 10'), ('Oneplus', 'Oneplus Ace'), ('Samsung', 'Galaxy Z Flip3')])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看分组名\n",
    "groupedData2.groups.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "06518c0d-fe40-4991-be81-e0adbe5e3c2b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:45.469462Z",
     "iopub.status.busy": "2022-06-08T04:05:45.469074Z",
     "iopub.status.idle": "2022-06-08T04:05:45.501555Z",
     "shell.execute_reply": "2022-06-08T04:05:45.500778Z",
     "shell.execute_reply.started": "2022-06-08T04:05:45.469436Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">price</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>brand</th>\n",
       "      <th>model</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Apple</th>\n",
       "      <th>iPhone 13</th>\n",
       "      <td>1.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>iPhone 13 Pro Max</th>\n",
       "      <td>1.0</td>\n",
       "      <td>8999.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8999.0</td>\n",
       "      <td>8999.0</td>\n",
       "      <td>8999.0</td>\n",
       "      <td>8999.0</td>\n",
       "      <td>8999.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Oneplus</th>\n",
       "      <th>Oneplus 10</th>\n",
       "      <td>1.0</td>\n",
       "      <td>5299.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5299.0</td>\n",
       "      <td>5299.0</td>\n",
       "      <td>5299.0</td>\n",
       "      <td>5299.0</td>\n",
       "      <td>5299.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oneplus Ace</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2999.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2999.0</td>\n",
       "      <td>2999.0</td>\n",
       "      <td>2999.0</td>\n",
       "      <td>2999.0</td>\n",
       "      <td>2999.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Samsung</th>\n",
       "      <th>Galaxy Z Flip3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "      <td>5999.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          price                                              \\\n",
       "                          count    mean std     min     25%     50%     75%   \n",
       "brand   model                                                                 \n",
       "Apple   iPhone 13           1.0  5999.0 NaN  5999.0  5999.0  5999.0  5999.0   \n",
       "        iPhone 13 Pro Max   1.0  8999.0 NaN  8999.0  8999.0  8999.0  8999.0   \n",
       "Oneplus Oneplus 10          1.0  5299.0 NaN  5299.0  5299.0  5299.0  5299.0   \n",
       "        Oneplus Ace         1.0  2999.0 NaN  2999.0  2999.0  2999.0  2999.0   \n",
       "Samsung Galaxy Z Flip3      1.0  5999.0 NaN  5999.0  5999.0  5999.0  5999.0   \n",
       "\n",
       "                                   \n",
       "                              max  \n",
       "brand   model                      \n",
       "Apple   iPhone 13          5999.0  \n",
       "        iPhone 13 Pro Max  8999.0  \n",
       "Oneplus Oneplus 10         5299.0  \n",
       "        Oneplus Ace        2999.0  \n",
       "Samsung Galaxy Z Flip3     5999.0  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看分组详情\n",
    "groupedData2.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8c5840f-6c34-40de-a3e9-712051087e59",
   "metadata": {},
   "source": [
    "### 1.2 遍历分组内容"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "812b900d-f472-4eca-ac81-3a625ea52a37",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:47.840168Z",
     "iopub.status.busy": "2022-06-08T04:05:47.839830Z",
     "iopub.status.idle": "2022-06-08T04:05:47.848315Z",
     "shell.execute_reply": "2022-06-08T04:05:47.847429Z",
     "shell.execute_reply.started": "2022-06-08T04:05:47.840143Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('Apple',                model  brand  price\n",
      "1          iPhone 13  Apple   5999\n",
      "3  iPhone 13 Pro Max  Apple   8999)\n",
      "=========\n",
      "('Oneplus',          model    brand  price\n",
      "0  Oneplus Ace  Oneplus   2999\n",
      "4   Oneplus 10  Oneplus   5299)\n",
      "=========\n",
      "('Samsung',             model    brand  price\n",
      "2  Galaxy Z Flip3  Samsung   5999)\n",
      "=========\n"
     ]
    }
   ],
   "source": [
    "# 以一个变量接受分组\n",
    "# 每个分组都是一个二维元组：\n",
    "# 第一个元素是分组名（字符串）\n",
    "# 第二个元素是代码块（DataFrame对象）\n",
    "for group in groupedData:\n",
    "    print(group)\n",
    "    print(\"===\"*3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "cd0a8c82-1082-4372-bffd-2af228d40e54",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:48.544111Z",
     "iopub.status.busy": "2022-06-08T04:05:48.543800Z",
     "iopub.status.idle": "2022-06-08T04:05:48.552063Z",
     "shell.execute_reply": "2022-06-08T04:05:48.551362Z",
     "shell.execute_reply.started": "2022-06-08T04:05:48.544088Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Apple\n",
      "---------\n",
      "               model  brand  price\n",
      "1          iPhone 13  Apple   5999\n",
      "3  iPhone 13 Pro Max  Apple   8999\n",
      "=========\n",
      "Oneplus\n",
      "---------\n",
      "         model    brand  price\n",
      "0  Oneplus Ace  Oneplus   2999\n",
      "4   Oneplus 10  Oneplus   5299\n",
      "=========\n",
      "Samsung\n",
      "---------\n",
      "            model    brand  price\n",
      "2  Galaxy Z Flip3  Samsung   5999\n",
      "=========\n"
     ]
    }
   ],
   "source": [
    "# 以两个变量接受分组\n",
    "# 第一个变量是分组名（字符串）\n",
    "# 第二个变量是代码块（DataFrame对象）\n",
    "for name,gData in groupedData:\n",
    "    print(name)\n",
    "    print(\"---\"*3)\n",
    "    print(gData)\n",
    "    print(\"===\"*3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bd22862b-4afa-4ab1-a369-e59df6ae69b1",
   "metadata": {},
   "source": [
    "### 1.3 选取数据块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "746eb116-6316-459b-b475-204d41b9d413",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:51.291113Z",
     "iopub.status.busy": "2022-06-08T04:05:51.290650Z",
     "iopub.status.idle": "2022-06-08T04:05:51.298845Z",
     "shell.execute_reply": "2022-06-08T04:05:51.298172Z",
     "shell.execute_reply.started": "2022-06-08T04:05:51.291086Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>brand</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>iPhone 13</td>\n",
       "      <td>Apple</td>\n",
       "      <td>5999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>iPhone 13 Pro Max</td>\n",
       "      <td>Apple</td>\n",
       "      <td>8999</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               model  brand  price\n",
       "1          iPhone 13  Apple   5999\n",
       "3  iPhone 13 Pro Max  Apple   8999"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方法一\n",
    "# 1. DataFrameGroupBy->list->dict\n",
    "# 2. 使用字典操作来选取数据块\n",
    "dict(list(groupedData))[\"Apple\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "04a13027-65bf-44b1-ac26-69ea0953dbe5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:52.771874Z",
     "iopub.status.busy": "2022-06-08T04:05:52.771545Z",
     "iopub.status.idle": "2022-06-08T04:05:52.779350Z",
     "shell.execute_reply": "2022-06-08T04:05:52.778609Z",
     "shell.execute_reply.started": "2022-06-08T04:05:52.771849Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>brand</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>iPhone 13</td>\n",
       "      <td>Apple</td>\n",
       "      <td>5999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>iPhone 13 Pro Max</td>\n",
       "      <td>Apple</td>\n",
       "      <td>8999</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               model  brand  price\n",
       "1          iPhone 13  Apple   5999\n",
       "3  iPhone 13 Pro Max  Apple   8999"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方法二\n",
    "# 使用分组对象自带的get_group函数\n",
    "groupedData.get_group(\"Apple\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a5f18102-c0b5-4a61-a8d2-b257e970cbe4",
   "metadata": {},
   "source": [
    "## 二、聚合"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "037e1154-a48f-4a03-9336-dd5c1b2c4e8c",
   "metadata": {},
   "source": [
    "### 2.1 聚合函数"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fedf6a63-b500-4c42-bc25-14ac2397acbc",
   "metadata": {},
   "source": [
    "<img src=\"images/jh.jpg\" alt=\"聚合函数\" style=\"zoom: 35%;\" />"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "163d1806-a5f0-40a6-b43c-9b9851289fbc",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T03:36:43.907783Z",
     "iopub.status.busy": "2022-06-08T03:36:43.907403Z",
     "iopub.status.idle": "2022-06-08T03:36:43.911247Z",
     "shell.execute_reply": "2022-06-08T03:36:43.910501Z",
     "shell.execute_reply.started": "2022-06-08T03:36:43.907762Z"
    },
    "tags": []
   },
   "source": [
    "### 2.2 常规聚合操作形式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "761d4c73-5332-4005-b831-cd45df3001eb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:57.509595Z",
     "iopub.status.busy": "2022-06-08T04:05:57.509268Z",
     "iopub.status.idle": "2022-06-08T04:05:57.515570Z",
     "shell.execute_reply": "2022-06-08T04:05:57.514823Z",
     "shell.execute_reply.started": "2022-06-08T04:05:57.509570Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "Apple      7499.0\n",
       "Oneplus    4149.0\n",
       "Samsung    5999.0\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 例如：根据分组求均值（根据品牌求手机均价）\n",
    "# 选中分组对象里要操作的字段（例如：price），执行聚合函数（例如：mean）\n",
    "groupedData[\"price\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "0e1b530a-3a9b-4f62-b13b-d8d8aa76fc17",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:58.085346Z",
     "iopub.status.busy": "2022-06-08T04:05:58.084798Z",
     "iopub.status.idle": "2022-06-08T04:05:58.091624Z",
     "shell.execute_reply": "2022-06-08T04:05:58.090914Z",
     "shell.execute_reply.started": "2022-06-08T04:05:58.085320Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "Apple      7499.0\n",
       "Oneplus    4149.0\n",
       "Samsung    5999.0\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 完整形式\n",
    "# 例如：根据分组求均值（根据品牌求手机均价）\n",
    "# 选中分组对象里要操作的字段（例如：price），执行聚合函数（例如：mean）\n",
    "# 默认情况下会一分组名为行索引\n",
    "data.groupby(by=\"brand\")[\"price\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "0cb8cefc-a311-4cee-9774-9ae6a774abcd",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:05:59.782018Z",
     "iopub.status.busy": "2022-06-08T04:05:59.781687Z",
     "iopub.status.idle": "2022-06-08T04:05:59.789458Z",
     "shell.execute_reply": "2022-06-08T04:05:59.788699Z",
     "shell.execute_reply.started": "2022-06-08T04:05:59.781993Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand    model            \n",
       "Apple    iPhone 13            5999.0\n",
       "         iPhone 13 Pro Max    8999.0\n",
       "Oneplus  Oneplus 10           5299.0\n",
       "         Oneplus Ace          2999.0\n",
       "Samsung  Galaxy Z Flip3       5999.0\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 完整形式\n",
    "# 例如：根据分组求均值（根据品牌求手机均价）\n",
    "# 选中分组对象里要操作的字段（例如：price），执行聚合函数（例如：mean）\n",
    "# 默认情况下会一分组名为行索引\n",
    "data.groupby(by=[\"brand\",\"model\"])[\"price\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "8006e10f-c1c8-4986-8163-22cae414df09",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:06:00.232900Z",
     "iopub.status.busy": "2022-06-08T04:06:00.232359Z",
     "iopub.status.idle": "2022-06-08T04:06:00.241830Z",
     "shell.execute_reply": "2022-06-08T04:06:00.241149Z",
     "shell.execute_reply.started": "2022-06-08T04:06:00.232875Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>brand</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Apple</td>\n",
       "      <td>7499.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Oneplus</td>\n",
       "      <td>4149.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Samsung</td>\n",
       "      <td>5999.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     brand   price\n",
       "0    Apple  7499.0\n",
       "1  Oneplus  4149.0\n",
       "2  Samsung  5999.0"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 手动设置不以分组名为行索引\n",
    "data.groupby(by=\"brand\", as_index=False)[\"price\"].mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5873c2b2-546a-4791-b0b0-4726c618d8fd",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T03:36:43.907783Z",
     "iopub.status.busy": "2022-06-08T03:36:43.907403Z",
     "iopub.status.idle": "2022-06-08T03:36:43.911247Z",
     "shell.execute_reply": "2022-06-08T03:36:43.910501Z",
     "shell.execute_reply.started": "2022-06-08T03:36:43.907762Z"
    },
    "tags": []
   },
   "source": [
    "### 2.3 语法糖聚合操作形式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "209c4f8b-2963-4dd6-9273-0fcf5c90bf95",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:06:02.463805Z",
     "iopub.status.busy": "2022-06-08T04:06:02.463358Z",
     "iopub.status.idle": "2022-06-08T04:06:02.470463Z",
     "shell.execute_reply": "2022-06-08T04:06:02.469722Z",
     "shell.execute_reply.started": "2022-06-08T04:06:02.463780Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "Apple      7499.0\n",
       "Oneplus    4149.0\n",
       "Samsung    5999.0\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 取出DataFrame的某一列数据（例如：price）\n",
    "# 按照DataFrame的另一列数据进行分组（例如：brand）\n",
    "data[\"price\"].groupby(data[\"brand\"]).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "74c9faa5-478e-40ba-adda-c085640ff232",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:06:03.167163Z",
     "iopub.status.busy": "2022-06-08T04:06:03.166846Z",
     "iopub.status.idle": "2022-06-08T04:06:03.174509Z",
     "shell.execute_reply": "2022-06-08T04:06:03.173805Z",
     "shell.execute_reply.started": "2022-06-08T04:06:03.167139Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand    model            \n",
       "Apple    iPhone 13            5999.0\n",
       "         iPhone 13 Pro Max    8999.0\n",
       "Oneplus  Oneplus 10           5299.0\n",
       "         Oneplus Ace          2999.0\n",
       "Samsung  Galaxy Z Flip3       5999.0\n",
       "Name: price, dtype: float64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 取出DataFrame的某一列数据（例如：price）\n",
    "# 按照DataFrame的另一列数据进行分组（例如：brand和model）\n",
    "data[\"price\"].groupby([data[\"brand\"],data[\"model\"]]).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8e3251e6-ab12-4388-abc3-eaa633d0e9ae",
   "metadata": {},
   "source": [
    "### 2.4 自定义聚合函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "b3b0e394-6655-4468-a88b-0b3d5404c573",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:06:08.439677Z",
     "iopub.status.busy": "2022-06-08T04:06:08.439102Z",
     "iopub.status.idle": "2022-06-08T04:06:08.447003Z",
     "shell.execute_reply": "2022-06-08T04:06:08.446275Z",
     "shell.execute_reply.started": "2022-06-08T04:06:08.439652Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "Apple      3000\n",
       "Oneplus    2300\n",
       "Samsung       0\n",
       "Name: price, dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 例：计算每个手机品牌的差\n",
    "# 自定义函数（例：求差值）\n",
    "def diff(x):\n",
    "    return x.max() - x.min()\n",
    "# 应用自定义函数聚合\n",
    "data.groupby(by=\"brand\")[\"price\"].aggregate(diff)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "57705e8d-eb55-4b63-b5bf-24450114b288",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-06-08T04:06:09.061020Z",
     "iopub.status.busy": "2022-06-08T04:06:09.060702Z",
     "iopub.status.idle": "2022-06-08T04:06:09.068154Z",
     "shell.execute_reply": "2022-06-08T04:06:09.067454Z",
     "shell.execute_reply.started": "2022-06-08T04:06:09.060995Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "brand\n",
       "Apple      3000\n",
       "Oneplus    2300\n",
       "Samsung       0\n",
       "Name: price, dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 例：计算每个手机品牌的差\n",
    "# 应用自定义函数聚合(lanmbda实现)\n",
    "data.groupby(by=\"brand\")[\"price\"].aggregate(lambda x: x.max()-x.min())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
